#include <iostream>
#include <string>
#include <strstream>
#include <fstream>
#include <map>
#include <vector>
#include <cstdlib>
#include <cstdio>

using namespace std;

int main(int argc,char**argv)
{
  string snt1(""), snt2(""), vcb1(""), vcb2("");
  vector<double>weights;
  vector<string>filenames;
  for(int i=1; i<argc; ++i)
    if(string(argv[i])=="-weight")
      weights.push_back(atof(argv[++i]));
    else if(string(argv[i])=="-snt1")
      snt1=argv[++i];
    else if(string(argv[i])=="-snt2")
      snt2=argv[++i];
    else if(string(argv[i])=="-vcb1")
      vcb1=argv[++i];
    else if(string(argv[i])=="-vcb2")
      vcb2=argv[++i];
    else
      filenames.push_back(argv[i]);

  if((filenames.size()%2)==1||filenames.size()==0 ) {
    cerr << argv[0] << " txt1 txt2 [txt3 txt4 -weight w -vcb1 output1.vcb -vcb2 output2.vcb -snt1 output1_output2.snt -snt2 output2_output1.snt]\n";
    cerr << " Converts plain text into GIZA++ snt-format.\n";
    exit(1);
  }
  string line1,line2,word;
  map<string,int> v1,v2;
  map<string,int> id1,id2;
  vector<string> iid1(2),iid2(2);

  string w1(filenames[0]);
  string w2(filenames[1]);

  if( w1.length()>4&&w2.length()>4&&((w1.substr(w1.length()-4,w1.length())==".tok" && w2.substr(w2.length()-4,w2.length())==".tok" )||
                                     (w1.substr(w1.length()-4,w1.length())==".txt" && w2.substr(w2.length()-4,w2.length())==".txt" ) )) {
    w1=w1.substr(0,w1.length()-4);
    w2=w2.substr(0,w2.length()-4);
    cerr << "w1:"<< w1 << " w2:" << w2 << endl;
  }

  string vocab1(w1),vocab2(w2);
  unsigned int slashpos=vocab1.rfind('/')+1;
#ifdef WIN32
  if(slashpos==0) slashpos=vocab1.rfind('\\')+1;
#endif
  if( slashpos>=vocab1.length() ) slashpos=0;
  string vocab1x(vocab1.substr(slashpos,vocab1.length()));
  cout << vocab1 << " -> " << vocab1x << endl;
  slashpos=vocab2.rfind('/')+1;
#ifdef WIN32
  if(slashpos==0) slashpos=vocab1.rfind('\\')+1;
#endif
  if( slashpos>=vocab2.length() ) slashpos=0;
  string vocab2x(vocab2.substr(slashpos,vocab2.length()));
  cout << vocab2 << " -> " << vocab2x << endl;
  if (snt1=="") {
    snt1=vocab1+"_"+vocab2x+string(".snt");
  }
  if (snt2=="") {
    snt2=vocab2+"_"+vocab1x+string(".snt");
  }
  if (vcb1=="") {
    vocab1+=string(".vcb");
  } else {
    vocab1=vcb1;
  }
  if (vcb2=="") {
    vocab2+=string(".vcb");
  } else {
    vocab2=vcb2;
  }

  ofstream ovocab1(vocab1.c_str()),ovocab2(vocab2.c_str()),osnt1(snt1.c_str()),osnt2(snt2.c_str());
  for(unsigned int i=0; i<filenames.size(); i+=2) {
    ifstream i1(filenames[i].c_str()),i2(filenames[i+1].c_str());
    if(!i1)cerr << "WARNING: " << filenames[i] << " cannot be read.\n";
    if(!i2)cerr << "WARNING: " << filenames[i+1] << " cannot be read.\n";
    while(getline(i1,line1) && getline(i2,line2) ) {
      vector<string> t1,t2;
      istrstream ii1(line1.c_str());
      while(ii1>>word) {
        t1.push_back(word);
        v1[word]++;
        if( id1.find(word)==id1.end() ) {
          iid1.push_back(word);
          id1[word]=iid1.size()-1;
        }
      }
      istrstream ii2(line2.c_str());
      while(ii2>>word) {
        t2.push_back(word);
        v2[word]++;
        if( id2.find(word)==id2.end() ) {
          iid2.push_back(word);
          id2[word]=iid2.size()-1;
        }
      }
      double w=1.0;
      if( i/2<weights.size() )
        w=weights[i/2];
      if( t1.size()&&t2.size() ) {
        osnt1 << w << "\n";
        for(unsigned int j=0; j<t1.size(); ++j)osnt1 << id1[t1[j]] << ' ';
        osnt1 << '\n';
        for(unsigned int j=0; j<t2.size(); ++j)osnt1 << id2[t2[j]] << ' ';
        osnt1 << '\n';

        osnt2 << w << "\n";
        for(unsigned int j=0; j<t2.size(); ++j)osnt2 << id2[t2[j]] << ' ';
        osnt2 << '\n';
        for(unsigned int j=0; j<t1.size(); ++j)osnt2 << id1[t1[j]] << ' ';
        osnt2 << '\n';
      } else
        cerr << "WARNING: filtered out empty sentence (source: " << filenames[i] << " " << t1.size() <<
             " target: " << filenames[i+1] << " " << t2.size() << ").\n";
    }
  }

  for(unsigned int i=2; i<iid1.size(); ++i)
    ovocab1 << i << ' ' << iid1[i] << ' ' << v1[iid1[i]] << '\n';
  for(unsigned int i=2; i<iid2.size(); ++i)
    ovocab2 << i << ' ' << iid2[i] << ' ' << v2[iid2[i]] << '\n';
}
